{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "#import \n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import tensorflow as tf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "#importing the training dataset\n",
    "data=pd.read_csv('train.csv')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Function to normalize the dataset\n",
    "def feature_normalize(dataset):\n",
    "    mu=np.mean(dataset,axis=0)\n",
    "    sigma=np.std(dataset,axis=0)\n",
    "    return (dataset-mu)/sigma"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Function to convert string type data to integers\n",
    "\n",
    "def str_to_int(dataset):\n",
    "    string_columns=dataset.select_dtypes(['object']).columns\n",
    "    print(string_columns)\n",
    "    \n",
    "    for col in string_columns:\n",
    "        dataset[col]=dataset.astype('category')\n",
    "        \n",
    "    categorical_columns=dataset.select_dtypes(['category']).columns\n",
    "    dataset[categorical_columns]=dataset[categorical_columns].apply(lambda x:x.cat.codes)\n",
    "    return dataset\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def one_hot(df, cols):\n",
    "    \"\"\"\n",
    "    @param df pandas DataFrame\n",
    "    @param cols a list of columns to encode \n",
    "    @return a DataFrame with one-hot encoding\n",
    "    \"\"\"\n",
    "    for each in cols:\n",
    "        dummies = pd.get_dummies(df[each], prefix=each, drop_first=False)\n",
    "        del df[each]\n",
    "        df = pd.concat([df, dummies], axis=1)\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Delete the following columns\n",
    "new_data=data.drop(labels=[\"Name\",\"Ticket\",\"Cabin\",\"Parch\"],axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Preprocessing data\n",
    "\n",
    "def preprocessing(df):\n",
    "    \n",
    "    #Replace the nan with mean value\n",
    "    df[\"Age\"]=df.fillna(df[\"Age\"].mean(),inplace=True)\n",
    "    \n",
    "    #Onehot encoding Pcclass\n",
    "    df=one_hot(df,df.loc[:,[\"Pclass\"]].columns)\n",
    "    \n",
    "    #String to integer\n",
    "    df=str_to_int(df)\n",
    "    \n",
    "    #age normalization\n",
    "    df[\"Age\"]=feature_normalize(df[\"Age\"])\n",
    "    \n",
    "    #stats.describe(df).variance\n",
    "    \n",
    "    return df\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "from scipy import stats\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Index(['Sex', 'Age', 'Embarked'], dtype='object')\n"
     ]
    }
   ],
   "source": [
    "df_train=preprocessing(new_data)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   PassengerId  Survived     Sex   Age  SibSp     Fare Embarked\n",
      "0            1         0    male  None      1   7.2500        S\n",
      "1            2         1  female  None      1  71.2833        C\n",
      "2            3         1  female  None      0   7.9250        S\n",
      "3            4         1  female  None      1  53.1000        S\n",
      "4            5         0    male  None      0   8.0500        S\n",
      "5            6         0    male  None      0   8.4583        Q\n",
      "6            7         0    male  None      0  51.8625        S\n",
      "7            8         0    male  None      3  21.0750        S\n",
      "8            9         1  female  None      0  11.1333        S\n",
      "9           10         1  female  None      1  30.0708        C\n"
     ]
    }
   ],
   "source": [
    "print(new_data.head(10))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(891, 5)\n"
     ]
    }
   ],
   "source": [
    "\n",
    "\n",
    "features=df_train.iloc[:,2:7]\n",
    "print(features.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(891, 8)\n"
     ]
    }
   ],
   "source": [
    "features=df_train.iloc[:,2:].values\n",
    "print(features.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(891, 1)\n"
     ]
    }
   ],
   "source": [
    "labels=df_train.iloc[:,:1].values\n",
    "print(labels.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Working with test data\n",
    "test_data=pd.read_csv(\"test.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "new_test_data=test_data.drop(labels=[\"Name\",\"Ticket\",\"Cabin\",\"Parch\"],axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Index(['Sex', 'Age', 'Embarked'], dtype='object')\n"
     ]
    }
   ],
   "source": [
    "test_features=preprocessing(new_test_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   PassengerId     Sex   Age  SibSp     Fare Embarked\n",
      "0          892    male  None      0   7.8292        Q\n",
      "1          893  female  None      1   7.0000        S\n",
      "2          894    male  None      0   9.6875        Q\n",
      "3          895    male  None      0   8.6625        S\n",
      "4          896  female  None      1  12.2875        S\n",
      "5          897    male  None      0   9.2250        S\n",
      "6          898  female  None      0   7.6292        Q\n",
      "7          899    male  None      1  29.0000        S\n",
      "8          900  female  None      0   7.2292        C\n",
      "9          901    male  None      2  24.1500        S\n"
     ]
    }
   ],
   "source": [
    "print(new_test_data.head(10))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Encoding Sex data\n",
    "new_test_data['Sex'] = [1 if item == 'male' else 0 for item in new_test_data['Sex']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "ename": "KeyError",
     "evalue": "\"None of [['Pclass']] are in the [columns]\"",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-20-0993d9cb4922>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;31m#one hot encoding Pclass\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mnew_test_data\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mone_hot\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnew_test_data\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mnew_test_data\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"Pclass\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32mC:\\ANACONDA\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m   1470\u001b[0m             \u001b[1;32mexcept\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mKeyError\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mIndexError\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1471\u001b[0m                 \u001b[1;32mpass\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1472\u001b[1;33m             \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_getitem_tuple\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1473\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1474\u001b[0m             \u001b[1;31m# we by definition only have the 0th axis\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ANACONDA\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m_getitem_tuple\u001b[1;34m(self, tup)\u001b[0m\n\u001b[0;32m    888\u001b[0m                 \u001b[1;32mcontinue\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    889\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 890\u001b[1;33m             \u001b[0mretval\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mretval\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_getitem_axis\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    891\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    892\u001b[0m         \u001b[1;32mreturn\u001b[0m \u001b[0mretval\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ANACONDA\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m_getitem_axis\u001b[1;34m(self, key, axis)\u001b[0m\n\u001b[0;32m   1899\u001b[0m                     \u001b[1;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'Cannot index with multidimensional key'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1900\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1901\u001b[1;33m                 \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_getitem_iterable\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1902\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1903\u001b[0m             \u001b[1;31m# nested tuple slicing\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ANACONDA\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m_getitem_iterable\u001b[1;34m(self, key, axis)\u001b[0m\n\u001b[0;32m   1141\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0mlabels\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mis_unique\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mIndex\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkeyarr\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mis_unique\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1142\u001b[0m                 \u001b[0mindexer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0max\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_indexer_for\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1143\u001b[1;33m                 \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_validate_read_indexer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindexer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1144\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1145\u001b[0m                 \u001b[0md\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m{\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m:\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0max\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreindex\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkeyarr\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindexer\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ANACONDA\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m_validate_read_indexer\u001b[1;34m(self, key, indexer, axis)\u001b[0m\n\u001b[0;32m   1204\u001b[0m                 raise KeyError(\n\u001b[0;32m   1205\u001b[0m                     u\"None of [{key}] are in the [{axis}]\".format(\n\u001b[1;32m-> 1206\u001b[1;33m                         key=key, axis=self.obj._get_axis_name(axis)))\n\u001b[0m\u001b[0;32m   1207\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1208\u001b[0m             \u001b[1;31m# we skip the warning on Categorical/Interval\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mKeyError\u001b[0m: \"None of [['Pclass']] are in the [columns]\""
     ]
    }
   ],
   "source": [
    "#one hot encoding Pclass\n",
    "new_test_data=one_hot(new_test_data,new_test_data.loc[:,[\"Pclass\"]].columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "Xtest=new_test_data.iloc[:,1:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "Xtest=Xtest.values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(418, 5)\n"
     ]
    }
   ],
   "source": [
    "print(Xtest.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "8 1\n"
     ]
    }
   ],
   "source": [
    "feature_count=features.shape[1]\n",
    "label_count=labels.shape[1]\n",
    "print(feature_count, label_count)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "#defining the inputs \n",
    "epochs=2000\n",
    "learning_rate=0.01\n",
    "hidden_layers=feature_count-1\n",
    "cost_history = np.empty(shape=[1],dtype=float)\n",
    "\n",
    "X = tf.placeholder(tf.float32,[None,feature_count])\n",
    "Y = tf.placeholder(tf.float32,[None,label_count])\n",
    "hidden_layers = feature_count - 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "#defining the model\n",
    "\n",
    "initializer=tf.contrib.layers.xavier_initializer()\n",
    "\n",
    "h0=tf.layers.dense(X,hidden_layers,activation=tf.nn.relu,kernel_initializer=initializer) #defining the model\n",
    "\n",
    "h1 = tf.layers.dense(h0, label_count, activation=None) #defining the output layer\n",
    "\n",
    "cross_entropy=tf.nn.sigmoid_cross_entropy_with_logits(labels=Y, logits=h1)\n",
    "\n",
    "cost=tf.reduce_mean(cross_entropy)\n",
    "\n",
    "optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)\n",
    "\n",
    "predicted=tf.nn.sigmoid(h1)\n",
    "correct_pred=tf.equal(tf.round(predicted),Y)\n",
    "accuracy=tf.reduce_mean(tf.cast(correct_pred,tf.float32))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step:     0\tLoss: 73618.055\tAcc: 0.11%\n",
      "Step:   100\tLoss: -24497564.000\tAcc: 0.11%\n",
      "Step:   200\tLoss: -110411000.000\tAcc: 0.11%\n",
      "Step:   300\tLoss: -252314672.000\tAcc: 0.11%\n",
      "Step:   400\tLoss: -445811840.000\tAcc: 0.11%\n",
      "Step:   500\tLoss: -687261952.000\tAcc: 0.11%\n",
      "Step:   600\tLoss: -973527232.000\tAcc: 0.11%\n",
      "Step:   700\tLoss: -1301880320.000\tAcc: 0.11%\n",
      "Step:   800\tLoss: -1669945984.000\tAcc: 0.11%\n",
      "Step:   900\tLoss: -2075652992.000\tAcc: 0.11%\n",
      "Step:  1000\tLoss: -2517200128.000\tAcc: 0.11%\n",
      "Step:  1100\tLoss: -2993018112.000\tAcc: 0.11%\n",
      "Step:  1200\tLoss: -3501743104.000\tAcc: 0.11%\n",
      "Step:  1300\tLoss: -4042188032.000\tAcc: 0.11%\n",
      "Step:  1400\tLoss: -4613324800.000\tAcc: 0.11%\n",
      "Step:  1500\tLoss: -5214258688.000\tAcc: 0.11%\n",
      "Step:  1600\tLoss: -5844214784.000\tAcc: 0.11%\n",
      "Step:  1700\tLoss: -6502520320.000\tAcc: 0.11%\n",
      "Step:  1800\tLoss: -7188591104.000\tAcc: 0.11%\n",
      "Step:  1900\tLoss: -7901918208.000\tAcc: 0.11%\n",
      "Step:  2000\tLoss: -8642062336.000\tAcc: 0.11%\n"
     ]
    }
   ],
   "source": [
    "#Running the Session\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    sess.run(tf.global_variables_initializer())\n",
    "    \n",
    "    for step in range (epochs+1):\n",
    "        sess.run(optimizer,feed_dict={X:features, Y:labels})\n",
    "        \n",
    "        loss,_,acc=sess.run([cost ,optimizer , accuracy], feed_dict={X:features, Y:labels})\n",
    "        \n",
    "        cost_history=np.append(cost_history,acc)\n",
    "        if step%100==0:\n",
    "            print(\"Step: {:5}\\tLoss: {:.3f}\\tAcc: {:.2%}\".format(step, loss, acc))\n",
    "                \n",
    "    #print('Test Accuracy:', sess.run([accuracy, tf.round(predicted)], feed_dict={X: test_x, Y: test_y}))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
